#include <cstdio>
#include <string>
#include <cstring>
#include <cstdlib>
#include <thread>
#include <atomic>
#include <ctime>
#include <mutex>
#ifdef WIN32
#include <io.h>
#include <fcntl.h>
#endif
int instance = 0;
int sub_input = 0, sub_output = 0;
std::atomic<int> way_count = 0;
using namespace std;
const int n_len = 65536-16;
std::mutex mtx;
int main(int argc, char * argv[])
{
	std::this_thread::sleep_for(std::chrono::milliseconds(2000));
	//In windows, stdio must be set to BINARY mode, to
	//prevent linebreak \\n\\r replace.
#ifdef WIN32
	setmode(fileno(stdout),O_BINARY);
	setmode(fileno(stdin),O_BINARY);
#endif
	bool bInfo = false, finished = false;
	string function;
	//1. parse cmdline
	for (int i=1;i<argc;++i)
	{
		string arg_key = argv[i], arg_value = argv[i];
		int idx = arg_key.find('=');
		if (idx>=0 && idx<arg_key.size())
		{
			arg_key = arg_key.substr(0,idx);
			arg_value = arg_value.substr(idx+1);
		}
		if (arg_key=="--function")
			function = arg_value;
		else if (arg_key=="--information")
			bInfo = true;
		else if (arg_key=="--instance")
			instance = atoi(arg_value.c_str());
		else if (arg_key=="--data_in")
			sub_input = atoi(arg_value.c_str());
		else if (arg_key=="--data_out")
			sub_output = atoi(arg_value.c_str());
		fprintf(stderr,"%s:%s\n",arg_key.c_str(),arg_value.c_str());
		fflush(stderr);
	}
	//2. function case
	if (bInfo)
	{
		//In this example, json file will be published with exe file.
		//We will return directly.  Or, you can output json here to stdout,
		//If you do not want to publish your json file.
		return 0;
	}
	else if (instance<=0 || function.length()==0)
		return -1;
	else
	{
		std::thread th_send([&]()->void {
			if (sub_output > 0)
			{
				int thd = 10;
				while (way_count >= 0)
				{
					if (way_count < thd )
					{
						static char buf_header[4] = { 0x3c,0x5a,0x7e,0x69 };
						static char data[n_len]{ 0 };
						static clock_t* clk = (clock_t*)&data[0];
						static long long* cnt = (long long*)&data[8];
						*clk = clock();
						mtx.lock();
						fwrite(buf_header, 1, 4, stdout);
						fwrite(&sub_output, sizeof(int), 1, stdout);
						fwrite(&instance, sizeof(int), 1, stdout);
						fwrite(&n_len, sizeof(int), 1, stdout);
						fwrite(data, sizeof(char), n_len, stdout);
						++way_count;
						++(*cnt);
						fflush(stdout);
						mtx.unlock();
					}
					else
					{
						std::this_thread::sleep_for(std::chrono::milliseconds(1));
						if (way_count ==0 && thd <1000 )
							++thd; 
					}				
				}
			}
		});
		long long recvcnt = 0;
		long long delay = 0;
		clock_t first_clk = 0;
		while(false==finished)
		{
			static char header[4], data[n_len]{ 0 };
			static clock_t* clk = (clock_t*)&data[0];
			static long long* cnt = (long long*)&data[8];
			int n_sub = 0, n_path = 0, len = 0;
			fread(header,1,4,stdin);	//2.1 read header
			if (header[0]!=0x3C || header[1]!=0x5A || header[2]!=0x7E || header[3]!=0x69)
			{
				fprintf(stderr,"BAD HEADER\n");
				fflush(stderr);

				continue;
			}
			fread(&n_sub,sizeof(int),1,stdin);
			fread(&n_path,sizeof(int),1,stdin);
			fread(&len,sizeof(int),1,stdin);
			if (len < 0 || len != n_len || n_sub <= 0)
			{
				int rflen = len;
				while (rflen > 0)
				{
					int rdlen = n_len;
					if (rdlen > rflen)
						rdlen = rflen;
					fread(data, sizeof(char), rdlen, stdin);
					rflen -= rdlen;
				}				
				if (strstr(data, "function=quit;") != nullptr)
				{
					finished = true;
					continue;
				}
			}
			else
			{
				fread(data, sizeof(char), n_len, stdin);
				if (n_sub != sub_input)
				{
					fprintf(stderr,"BAD SUBJECT\n");
					fflush(stderr);
					continue;
				}

				if (n_path != instance)
				{
					mtx.lock();
					fwrite(header, 1, 4, stdout);
					fwrite(&sub_output, sizeof(int), 1, stdout);
					fwrite(&n_path, sizeof(int), 1, stdout);
					fwrite(&n_len, sizeof(int), 1, stdout);
					fwrite(data, sizeof(char), n_len, stdout);
					fflush(stdout);
					mtx.unlock();
				}
				else
				{
					--way_count;
					if (recvcnt == 0)
						first_clk = *clk;
					++recvcnt;
					delay += clock() - *clk;
					if (recvcnt >= 10000)
					{
						recvcnt = 0;
						delay /= 10000;
						long long total_bytes = 65536 * 10000;
						double tmCost = (*clk - first_clk) * 1.0 / CLOCKS_PER_SEC+1e-10;
						double speed = total_bytes*1.0/1024/1024/tmCost;
						fprintf(stderr, "Cnt = %d, Average delay = %d clocks, total Speed = %.2lf MB/s.\n", *cnt,(int)delay,speed);
						fflush(stderr);
					}
				}
			}
			

		}
		way_count = -2;
		th_send.join();
	}
	//3.exit
	return 0;
}
